pacman::p_load(olsrr, corrplot, tidyverse, ggpubr, sf, spdep, GWmodel, tmap, gtsummary)Calibrating Hedonic Pricing Model for Private Highrise Property with GWR Method
Overview
This exercise is a revision of the Hands-on Exercise 4.
Getting Started
First, we need to load the necessary packages.
Now we import the geospatial data.
mpsz = st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL") %>%
st_transform(crs = 3414)Reading layer `MP14_SUBZONE_WEB_PL' from data source
`/Users/sylvia/sylvie-le/ISSS624/In-class_Ex/In-class_Ex4/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
Now we import the aspatial data. At the same time, we also transform it into SVY21 projected coordinate system.
condo_resale.sf = read_csv("data/aspatial/Condo_resale_2015.csv") %>%
st_as_sf(coords = c('LONGITUDE', 'LATITUDE'), crs = 4326) %>%
st_transform(crs = 3414)Exploratory Data Analysis
EDA using summary
First, we will look at the variables in condo_resale.sf file for their distribution.
summary(condo_resale.sf) POSTCODE SELLING_PRICE AREA_SQM AGE
Min. : 18965 Min. : 540000 Min. : 34.0 Min. : 0.00
1st Qu.:259849 1st Qu.: 1100000 1st Qu.:103.0 1st Qu.: 5.00
Median :469298 Median : 1383222 Median :121.0 Median :11.00
Mean :440439 Mean : 1751211 Mean :136.5 Mean :12.14
3rd Qu.:589486 3rd Qu.: 1950000 3rd Qu.:156.0 3rd Qu.:18.00
Max. :828833 Max. :18000000 Max. :619.0 Max. :37.00
PROX_CBD PROX_CHILDCARE PROX_ELDERLYCARE PROX_URA_GROWTH_AREA
Min. : 0.3869 Min. :0.004927 Min. :0.05451 Min. :0.2145
1st Qu.: 5.5574 1st Qu.:0.174481 1st Qu.:0.61254 1st Qu.:3.1643
Median : 9.3567 Median :0.258135 Median :0.94179 Median :4.6186
Mean : 9.3254 Mean :0.326313 Mean :1.05351 Mean :4.5981
3rd Qu.:12.6661 3rd Qu.:0.368293 3rd Qu.:1.35122 3rd Qu.:5.7550
Max. :19.1804 Max. :3.465726 Max. :3.94916 Max. :9.1554
PROX_HAWKER_MARKET PROX_KINDERGARTEN PROX_MRT PROX_PARK
Min. :0.05182 Min. :0.004927 Min. :0.05278 Min. :0.02906
1st Qu.:0.55245 1st Qu.:0.276345 1st Qu.:0.34646 1st Qu.:0.26211
Median :0.90842 Median :0.413385 Median :0.57430 Median :0.39926
Mean :1.27987 Mean :0.458903 Mean :0.67316 Mean :0.49802
3rd Qu.:1.68578 3rd Qu.:0.578474 3rd Qu.:0.84844 3rd Qu.:0.65592
Max. :5.37435 Max. :2.229045 Max. :3.48037 Max. :2.16105
PROX_PRIMARY_SCH PROX_TOP_PRIMARY_SCH PROX_SHOPPING_MALL PROX_SUPERMARKET
Min. :0.07711 Min. :0.07711 Min. :0.0000 Min. :0.0000
1st Qu.:0.44024 1st Qu.:1.34451 1st Qu.:0.5258 1st Qu.:0.3695
Median :0.63505 Median :1.88213 Median :0.9357 Median :0.5687
Mean :0.75471 Mean :2.27347 Mean :1.0455 Mean :0.6141
3rd Qu.:0.95104 3rd Qu.:2.90954 3rd Qu.:1.3994 3rd Qu.:0.7862
Max. :3.92899 Max. :6.74819 Max. :3.4774 Max. :2.2441
PROX_BUS_STOP NO_Of_UNITS FAMILY_FRIENDLY FREEHOLD
Min. :0.001595 Min. : 18.0 Min. :0.0000 Min. :0.0000
1st Qu.:0.098356 1st Qu.: 188.8 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.151710 Median : 360.0 Median :0.0000 Median :0.0000
Mean :0.193974 Mean : 409.2 Mean :0.4868 Mean :0.4227
3rd Qu.:0.220466 3rd Qu.: 590.0 3rd Qu.:1.0000 3rd Qu.:1.0000
Max. :2.476639 Max. :1703.0 Max. :1.0000 Max. :1.0000
LEASEHOLD_99YR geometry
Min. :0.0000 POINT :1436
1st Qu.:0.0000 epsg:3414 : 0
Median :0.0000 +proj=tmer...: 0
Mean :0.4882
3rd Qu.:1.0000
Max. :1.0000
There are many variables so summary is not the best tool to explore them.
EDA using graph
EDA dependent variable
Our dependent variable is SELLING_PRICE, which we will inspect using the plot below.
ggplot(data=condo_resale.sf, aes(x=`SELLING_PRICE`)) +
geom_histogram(bins=20, color="black", fill="light blue")
Because the distribution of SELLING_PRICE is highly skew to the right, we need to normalize it.
condo_resale.sf <- condo_resale.sf %>%
mutate(`LOG_SELLING_PRICE` = log(SELLING_PRICE))EDA independent variable
Let’s plot multiple graphs to view the variables’ distribution.
AREA_SQM <- ggplot(data=condo_resale.sf, aes(x= `AREA_SQM`)) +
geom_histogram(bins=20, color="black", fill="light blue")
AGE <- ggplot(data=condo_resale.sf, aes(x= `AGE`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_CBD <- ggplot(data=condo_resale.sf, aes(x= `PROX_CBD`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_CHILDCARE <- ggplot(data=condo_resale.sf, aes(x= `PROX_CHILDCARE`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_ELDERLYCARE <- ggplot(data=condo_resale.sf, aes(x= `PROX_ELDERLYCARE`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_URA_GROWTH_AREA <- ggplot(data=condo_resale.sf,
aes(x= `PROX_URA_GROWTH_AREA`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_HAWKER_MARKET <- ggplot(data=condo_resale.sf, aes(x= `PROX_HAWKER_MARKET`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_KINDERGARTEN <- ggplot(data=condo_resale.sf, aes(x= `PROX_KINDERGARTEN`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_MRT <- ggplot(data=condo_resale.sf, aes(x= `PROX_MRT`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_PARK <- ggplot(data=condo_resale.sf, aes(x= `PROX_PARK`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_PRIMARY_SCH <- ggplot(data=condo_resale.sf, aes(x= `PROX_PRIMARY_SCH`)) +
geom_histogram(bins=20, color="black", fill="light blue")
PROX_TOP_PRIMARY_SCH <- ggplot(data=condo_resale.sf,
aes(x= `PROX_TOP_PRIMARY_SCH`)) +
geom_histogram(bins=20, color="black", fill="light blue")
ggarrange(AREA_SQM, AGE, PROX_CBD, PROX_CHILDCARE, PROX_ELDERLYCARE,
PROX_URA_GROWTH_AREA, PROX_HAWKER_MARKET, PROX_KINDERGARTEN, PROX_MRT,
PROX_PARK, PROX_PRIMARY_SCH, PROX_TOP_PRIMARY_SCH,
ncol = 3, nrow = 4)
Drawing Statistical Point Map
tmap_mode("view")
tm_shape(mpsz)+
tm_polygons() +
tm_shape(condo_resale.sf) +
tm_dots(col = "SELLING_PRICE",
alpha = 0.6,
style="quantile") +
tm_view(set.zoom.limits = c(11,14)) +
tmap_options(check.and.fix = TRUE)tmap_mode("plot")Hedonic Pricing Modelling in R
Simple Linear Regression Method
To build a linear regression model with SELLING_PRICE as the dependent variable, and AREA_SQM as the independent variable. the lm() function creates a lm object (lm stands for linear model), which is assigned to condo.slr.
condo.slr <- lm(formula=SELLING_PRICE ~ AREA_SQM, data = condo_resale.sf)There are many ways to examine lm objects. Here we use summary but clicking on the object’s name in the Environment tab will also do.
summary(condo.slr)
Call:
lm(formula = SELLING_PRICE ~ AREA_SQM, data = condo_resale.sf)
Residuals:
Min 1Q Median 3Q Max
-3695815 -391764 -87517 258900 13503875
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -258121.1 63517.2 -4.064 5.09e-05 ***
AREA_SQM 14719.0 428.1 34.381 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 942700 on 1434 degrees of freedom
Multiple R-squared: 0.4518, Adjusted R-squared: 0.4515
F-statistic: 1182 on 1 and 1434 DF, p-value: < 2.2e-16
There are three things to notice:
The R Square value is only 0.4518, meaning about 45% of selling price values is explained by the area square meter.
Since this model only contains one independent variable, we do not need to refer to the Adjusted R Square.
The p-value of the intercept (B0) and coefficient (B1) is less than 0.05. We cannot reject the null hypothesis of B0 = 0 and B1 = 0 at 95% confidence interval.
We can also visualize the regression line using the lm object and ggplot() function. Remember to put method = lm in geom_smooth().
ggplot(data=condo_resale.sf,
aes(x=`AREA_SQM`, y=`SELLING_PRICE`)) +
geom_point() +
geom_smooth(method = lm)
Multiple Linear Regression Method
Checking for multicollinearity
When doing multiple linear regression, we need to check for multicollinearity between the independent variables and take care of the high correlation between them, if any.
corrplot(cor(st_drop_geometry(condo_resale.sf[, 3:21])), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.5, method = "number", type = "upper")
Matrix reorder is very important for mining the hiden structure and patter in the matrix. There are four methods in corrplot (parameter order), named “AOE”, “FPC”, “hclust”, “alphabet”. In the code chunk above, AOE order is used. It orders the variables by using the angular order of the eigenvectors method suggested by Michael Friendly.
From the plot above, we can see that LEASE_99YEAR and Freehold are highly correlated. We will not include LEASE_99YEAR in the modeling.
Building a hedonic pricing model using multiple linear regression method
condo.mlr <- lm(formula = SELLING_PRICE ~ AREA_SQM + AGE +
PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN +
PROX_MRT + PROX_PARK + PROX_PRIMARY_SCH +
PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
data=condo_resale.sf)
summary(condo.mlr)
Call:
lm(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD + PROX_CHILDCARE +
PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET +
PROX_KINDERGARTEN + PROX_MRT + PROX_PARK + PROX_PRIMARY_SCH +
PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
data = condo_resale.sf)
Residuals:
Min 1Q Median 3Q Max
-3475964 -293923 -23069 241043 12260381
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 481728.40 121441.01 3.967 7.65e-05 ***
AREA_SQM 12708.32 369.59 34.385 < 2e-16 ***
AGE -24440.82 2763.16 -8.845 < 2e-16 ***
PROX_CBD -78669.78 6768.97 -11.622 < 2e-16 ***
PROX_CHILDCARE -351617.91 109467.25 -3.212 0.00135 **
PROX_ELDERLYCARE 171029.42 42110.51 4.061 5.14e-05 ***
PROX_URA_GROWTH_AREA 38474.53 12523.57 3.072 0.00217 **
PROX_HAWKER_MARKET 23746.10 29299.76 0.810 0.41782
PROX_KINDERGARTEN 147468.99 82668.87 1.784 0.07466 .
PROX_MRT -314599.68 57947.44 -5.429 6.66e-08 ***
PROX_PARK 563280.50 66551.68 8.464 < 2e-16 ***
PROX_PRIMARY_SCH 180186.08 65237.95 2.762 0.00582 **
PROX_TOP_PRIMARY_SCH 2280.04 20410.43 0.112 0.91107
PROX_SHOPPING_MALL -206604.06 42840.60 -4.823 1.57e-06 ***
PROX_SUPERMARKET -44991.80 77082.64 -0.584 0.55953
PROX_BUS_STOP 683121.35 138353.28 4.938 8.85e-07 ***
NO_Of_UNITS -231.18 89.03 -2.597 0.00951 **
FAMILY_FRIENDLY 140340.77 47020.55 2.985 0.00289 **
FREEHOLD 359913.01 49220.22 7.312 4.38e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 755800 on 1417 degrees of freedom
Multiple R-squared: 0.6518, Adjusted R-squared: 0.6474
F-statistic: 147.4 on 18 and 1417 DF, p-value: < 2.2e-16
Preparing Publication Quality Table: olsrr method
The report above shows us the metrics we will use to analyze the quality of the model later, which are R Square, Adjusted R Square, p-values. However, it contains a lot of text and the layout is not very well organized. To address that, we will use the olsrr package.
Note: I don’t calculate
condo.mlr1because I want to explore the model with more independent variables.
ols_regress(condo.mlr) Model Summary
------------------------------------------------------------------------
R 0.807 RMSE 755816.386
R-Squared 0.652 Coef. Var 43.160
Adj. R-Squared 0.647 MSE 571258408962.149
Pred R-Squared 0.637 MAE 413425.809
------------------------------------------------------------------------
RMSE: Root Mean Square Error
MSE: Mean Square Error
MAE: Mean Absolute Error
ANOVA
--------------------------------------------------------------------------------
Sum of
Squares DF Mean Square F Sig.
--------------------------------------------------------------------------------
Regression 1.515174e+15 18 8.417631e+13 147.352 0.0000
Residual 8.094732e+14 1417 571258408962.149
Total 2.324647e+15 1435
--------------------------------------------------------------------------------
Parameter Estimates
-----------------------------------------------------------------------------------------------------------------
model Beta Std. Error Std. Beta t Sig lower upper
-----------------------------------------------------------------------------------------------------------------
(Intercept) 481728.405 121441.014 3.967 0.000 243504.909 719951.900
AREA_SQM 12708.324 369.590 0.580 34.385 0.000 11983.322 13433.326
AGE -24440.816 2763.164 -0.165 -8.845 0.000 -29861.148 -19020.484
PROX_CBD -78669.779 6768.972 -0.268 -11.622 0.000 -91948.061 -65391.496
PROX_CHILDCARE -351617.910 109467.252 -0.092 -3.212 0.001 -566353.201 -136882.619
PROX_ELDERLYCARE 171029.418 42110.506 0.083 4.061 0.000 88423.783 253635.053
PROX_URA_GROWTH_AREA 38474.534 12523.567 0.059 3.072 0.002 13907.809 63041.258
PROX_HAWKER_MARKET 23746.098 29299.755 0.019 0.810 0.418 -33729.461 81221.657
PROX_KINDERGARTEN 147468.986 82668.868 0.031 1.784 0.075 -14697.534 309635.506
PROX_MRT -314599.679 57947.441 -0.120 -5.429 0.000 -428271.672 -200927.687
PROX_PARK 563280.499 66551.675 0.148 8.464 0.000 432730.102 693830.897
PROX_PRIMARY_SCH 180186.083 65237.948 0.070 2.762 0.006 52212.744 308159.421
PROX_TOP_PRIMARY_SCH 2280.036 20410.435 0.002 0.112 0.911 -37757.880 42317.951
PROX_SHOPPING_MALL -206604.057 42840.595 -0.108 -4.823 0.000 -290641.863 -122566.252
PROX_SUPERMARKET -44991.803 77082.635 -0.012 -0.584 0.560 -196200.149 106216.542
PROX_BUS_STOP 683121.347 138353.278 0.134 4.938 0.000 411722.087 954520.608
NO_Of_UNITS -231.180 89.033 -0.050 -2.597 0.010 -405.830 -56.530
FAMILY_FRIENDLY 140340.770 47020.551 0.055 2.985 0.003 48103.399 232578.141
FREEHOLD 359913.008 49220.224 0.140 7.312 0.000 263360.671 456465.345
-----------------------------------------------------------------------------------------------------------------
Preparing Publication Quality Table: gtsummary method
Besides olsrr, we can also use the gtsummary package.
tbl_regression(condo.mlr, intercept = TRUE)| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| (Intercept) | 481,728 | 243,505, 719,952 | <0.001 |
| AREA_SQM | 12,708 | 11,983, 13,433 | <0.001 |
| AGE | -24,441 | -29,861, -19,020 | <0.001 |
| PROX_CBD | -78,670 | -91,948, -65,391 | <0.001 |
| PROX_CHILDCARE | -351,618 | -566,353, -136,883 | 0.001 |
| PROX_ELDERLYCARE | 171,029 | 88,424, 253,635 | <0.001 |
| PROX_URA_GROWTH_AREA | 38,475 | 13,908, 63,041 | 0.002 |
| PROX_HAWKER_MARKET | 23,746 | -33,729, 81,222 | 0.4 |
| PROX_KINDERGARTEN | 147,469 | -14,698, 309,636 | 0.075 |
| PROX_MRT | -314,600 | -428,272, -200,928 | <0.001 |
| PROX_PARK | 563,280 | 432,730, 693,831 | <0.001 |
| PROX_PRIMARY_SCH | 180,186 | 52,213, 308,159 | 0.006 |
| PROX_TOP_PRIMARY_SCH | 2,280 | -37,758, 42,318 | >0.9 |
| PROX_SHOPPING_MALL | -206,604 | -290,642, -122,566 | <0.001 |
| PROX_SUPERMARKET | -44,992 | -196,200, 106,217 | 0.6 |
| PROX_BUS_STOP | 683,121 | 411,722, 954,521 | <0.001 |
| NO_Of_UNITS | -231 | -406, -57 | 0.010 |
| FAMILY_FRIENDLY | 140,341 | 48,103, 232,578 | 0.003 |
| FREEHOLD | 359,913 | 263,361, 456,465 | <0.001 |
| 1 CI = Confidence Interval | |||
With gtsummary package, model statistics can be included in the report by either appending them to the report table by using add_glance_table() or adding as a table source note by using add_glance_source_note() as shown in the code chunk below.
tbl_regression(condo.mlr,
intercept = TRUE) %>%
add_glance_source_note(
label = list(sigma ~ "\U03C3"),
include = c(r.squared, adj.r.squared,
AIC, statistic,
p.value, sigma))| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| (Intercept) | 481,728 | 243,505, 719,952 | <0.001 |
| AREA_SQM | 12,708 | 11,983, 13,433 | <0.001 |
| AGE | -24,441 | -29,861, -19,020 | <0.001 |
| PROX_CBD | -78,670 | -91,948, -65,391 | <0.001 |
| PROX_CHILDCARE | -351,618 | -566,353, -136,883 | 0.001 |
| PROX_ELDERLYCARE | 171,029 | 88,424, 253,635 | <0.001 |
| PROX_URA_GROWTH_AREA | 38,475 | 13,908, 63,041 | 0.002 |
| PROX_HAWKER_MARKET | 23,746 | -33,729, 81,222 | 0.4 |
| PROX_KINDERGARTEN | 147,469 | -14,698, 309,636 | 0.075 |
| PROX_MRT | -314,600 | -428,272, -200,928 | <0.001 |
| PROX_PARK | 563,280 | 432,730, 693,831 | <0.001 |
| PROX_PRIMARY_SCH | 180,186 | 52,213, 308,159 | 0.006 |
| PROX_TOP_PRIMARY_SCH | 2,280 | -37,758, 42,318 | >0.9 |
| PROX_SHOPPING_MALL | -206,604 | -290,642, -122,566 | <0.001 |
| PROX_SUPERMARKET | -44,992 | -196,200, 106,217 | 0.6 |
| PROX_BUS_STOP | 683,121 | 411,722, 954,521 | <0.001 |
| NO_Of_UNITS | -231 | -406, -57 | 0.010 |
| FAMILY_FRIENDLY | 140,341 | 48,103, 232,578 | 0.003 |
| FREEHOLD | 359,913 | 263,361, 456,465 | <0.001 |
| R² = 0.652; Adjusted R² = 0.647; AIC = 42,970; Statistic = 147; p-value = <0.001; σ = 755,816 | |||
| 1 CI = Confidence Interval | |||
Checking for multicolinearity
olsrr can do more than just displaying tables. It can also be used to build regression models. Below is the list of what the package can do.
comprehensive regression output
residual diagnostics
measures of influence
heteroskedasticity tests
collinearity diagnostics
model fit assessment
variable contribution assessment
variable selection procedures
ols_vif_tol() is used to check for multicollinearity.
ols_vif_tol(condo.mlr) Variables Tolerance VIF
1 AREA_SQM 0.8625928 1.159296
2 AGE 0.7026139 1.423257
3 PROX_CBD 0.4605774 2.171188
4 PROX_CHILDCARE 0.2981029 3.354546
5 PROX_ELDERLYCARE 0.5922259 1.688545
6 PROX_URA_GROWTH_AREA 0.6614127 1.511915
7 PROX_HAWKER_MARKET 0.4373889 2.286295
8 PROX_KINDERGARTEN 0.8370845 1.194622
9 PROX_MRT 0.5049530 1.980382
10 PROX_PARK 0.8018396 1.247132
11 PROX_PRIMARY_SCH 0.3855782 2.593508
12 PROX_TOP_PRIMARY_SCH 0.4968645 2.012621
13 PROX_SHOPPING_MALL 0.4906426 2.038144
14 PROX_SUPERMARKET 0.6152063 1.625471
15 PROX_BUS_STOP 0.3320516 3.011580
16 NO_Of_UNITS 0.6731165 1.485627
17 FAMILY_FRIENDLY 0.7202230 1.388459
18 FREEHOLD 0.6729095 1.486084
No variable has VIF more than 10, so we can conclude that there are no sign of multicollinearity among the independent variables.
Testing for non-linearity
When doing regression model, we also need to test if the relationship between the dependent and independent variables are linear or not. We can do that using ols_plot_resid_fit().
ols_plot_resid_fit(condo.mlr)
Most of the residuals are scattered around the 0 line, so we can conclude that the relationship in concern is linear.
Testing for normality assumption
ols_plot_resid_hist(condo.mlr)
We can see that the residuals follow a normal distribution. A formal statistical method display of the test result can be achieved using ols_test_normality().
ols_test_normality(condo.mlr)-----------------------------------------------
Test Statistic pvalue
-----------------------------------------------
Shapiro-Wilk 0.6836 0.0000
Kolmogorov-Smirnov 0.1388 0.0000
Cramer-von Mises 120.5692 0.0000
Anderson-Darling 68.3268 0.0000
-----------------------------------------------
With many tests listed above, the p-value is less than 0.05. Therefore, we can reject the null hypothesis and conclude that the residuals are normally distributed at the 95% confidence interval.
Testing for Spatial Autocorrelation
The hedonic model we try to build are using geographically referenced attributes, hence it is also important for us to visual the residual of the hedonic pricing model.
In order to perform spatial autocorrelation test, we need to convert condo_resale.sf from sf data frame into a SpatialPointsDataFrame.
mlr.output <- as.data.frame(condo.mlr$residuals)
condo_resale.res.sf <- cbind(condo_resale.sf,
condo.mlr$residuals) %>%
rename(`MLR_RES` = `condo.mlr.residuals`)
condo_resale.sp <- as_Spatial(condo_resale.res.sf)Now we can create an interactive map.
tmap_mode("view")
tm_shape(mpsz)+
tmap_options(check.and.fix = TRUE) +
tm_polygons(alpha = 0.4) +
tm_shape(condo_resale.res.sf) +
tm_dots(col = "MLR_RES",
alpha = 0.6,
style="quantile") +
tm_view(set.zoom.limits = c(11,14))tmap_mode("plot")We can see that there are some spots where the high value of residuals tend to cluster together. It is a sign of spatial autocorrelation. We will use Moran’s I test to confirm. The test is based on the contiguity weight matrix using the Queen method.
nb <- dnearneigh(coordinates(condo_resale.sp), 0, 1500, longlat = FALSE)
nb_lw <- nb2listw(nb, style = 'W')
lm.morantest(condo.mlr, nb_lw)
Global Moran I for regression residuals
data:
model: lm(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD +
PROX_CHILDCARE + PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA +
PROX_HAWKER_MARKET + PROX_KINDERGARTEN + PROX_MRT + PROX_PARK +
PROX_PRIMARY_SCH + PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL +
PROX_SUPERMARKET + PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY +
FREEHOLD, data = condo_resale.sf)
weights: nb_lw
Moran I statistic standard deviate = 24.673, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Observed Moran I Expectation Variance
1.392527e-01 -7.134319e-03 3.520278e-05
The p-value of the test is less than 0.05. We will reject the null hypothesis that the residuals is randomly distributed. In addition, the Moran value is greater than 0, meaning the residuals resemble the cluster distribution.
Building Hedonic Pricing Models using GWmodel
Building Fixed Bandwidth GWR Model
We will use br.gwr() to determine the optimal fixed bandwidth to use in the model. adaptive is set to FALSE to reflect the fixed bandwidth method. CV cross-validation approach is used, so approach = CV.
bw.fixed <- bw.gwr(formula = SELLING_PRICE ~ AREA_SQM + AGE +
PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN +
PROX_MRT + PROX_PARK + PROX_PRIMARY_SCH +
PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
data=condo_resale.sp,
approach="CV",
kernel="gaussian",
adaptive=FALSE,
longlat=FALSE)Fixed bandwidth: 17660.96 CV score: 8.235467e+14
Fixed bandwidth: 10917.26 CV score: 7.902384e+14
Fixed bandwidth: 6749.419 CV score: 7.152539e+14
Fixed bandwidth: 4173.553 CV score: 6.182116e+14
Fixed bandwidth: 2581.58 CV score: 5.257275e+14
Fixed bandwidth: 1597.687 CV score: 4.748442e+14
Fixed bandwidth: 989.6077 CV score: 5.095011e+14
Fixed bandwidth: 1973.501 CV score: 4.85724e+14
Fixed bandwidth: 1365.421 CV score: 4.766341e+14
Fixed bandwidth: 1741.235 CV score: 4.772231e+14
Fixed bandwidth: 1508.969 CV score: 4.745788e+14
Fixed bandwidth: 1454.139 CV score: 4.749631e+14
Fixed bandwidth: 1542.857 CV score: 4.7456e+14
Fixed bandwidth: 1563.8 CV score: 4.746245e+14
Fixed bandwidth: 1529.913 CV score: 4.745487e+14
Fixed bandwidth: 1521.913 CV score: 4.745531e+14
Fixed bandwidth: 1534.857 CV score: 4.745504e+14
Fixed bandwidth: 1526.857 CV score: 4.745494e+14
Fixed bandwidth: 1531.801 CV score: 4.74549e+14
Fixed bandwidth: 1528.746 CV score: 4.745488e+14
Fixed bandwidth: 1530.634 CV score: 4.745488e+14
Fixed bandwidth: 1529.467 CV score: 4.745487e+14
Fixed bandwidth: 1530.188 CV score: 4.745487e+14
Fixed bandwidth: 1529.743 CV score: 4.745487e+14
Fixed bandwidth: 1530.018 CV score: 4.745487e+14
Fixed bandwidth: 1529.848 CV score: 4.745487e+14
Fixed bandwidth: 1529.808 CV score: 4.745487e+14
Fixed bandwidth: 1529.873 CV score: 4.745487e+14
Fixed bandwidth: 1529.888 CV score: 4.745487e+14
Fixed bandwidth: 1529.863 CV score: 4.745487e+14
Fixed bandwidth: 1529.879 CV score: 4.745487e+14
Fixed bandwidth: 1529.869 CV score: 4.745487e+14
Fixed bandwidth: 1529.867 CV score: 4.745487e+14
Fixed bandwidth: 1529.87 CV score: 4.745487e+14
Fixed bandwidth: 1529.871 CV score: 4.745487e+14
Fixed bandwidth: 1529.87 CV score: 4.745487e+14
Fixed bandwidth: 1529.871 CV score: 4.745487e+14
Fixed bandwidth: 1529.87 CV score: 4.745487e+14
Fixed bandwidth: 1529.871 CV score: 4.745487e+14
Fixed bandwidth: 1529.87 CV score: 4.745487e+14
Fixed bandwidth: 1529.87 CV score: 4.745487e+14
Fixed bandwidth: 1529.87 CV score: 4.745487e+14
GWModel method - fixed bandwith
gwr.fixed <- gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE +
PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN +
PROX_MRT + PROX_PARK + PROX_PRIMARY_SCH +
PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
data=condo_resale.sp,
bw=bw.fixed,
kernel = 'gaussian',
longlat = FALSE)gwr.fixed ***********************************************************************
* Package GWmodel *
***********************************************************************
Program starts at: 2022-12-10 22:58:02
Call:
gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD +
PROX_CHILDCARE + PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA +
PROX_HAWKER_MARKET + PROX_KINDERGARTEN + PROX_MRT + PROX_PARK +
PROX_PRIMARY_SCH + PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL +
PROX_SUPERMARKET + PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY +
FREEHOLD, data = condo_resale.sp, bw = bw.fixed, kernel = "gaussian",
longlat = FALSE)
Dependent (y) variable: SELLING_PRICE
Independent variables: AREA_SQM AGE PROX_CBD PROX_CHILDCARE PROX_ELDERLYCARE PROX_URA_GROWTH_AREA PROX_HAWKER_MARKET PROX_KINDERGARTEN PROX_MRT PROX_PARK PROX_PRIMARY_SCH PROX_TOP_PRIMARY_SCH PROX_SHOPPING_MALL PROX_SUPERMARKET PROX_BUS_STOP NO_Of_UNITS FAMILY_FRIENDLY FREEHOLD
Number of data points: 1436
***********************************************************************
* Results of Global Regression *
***********************************************************************
Call:
lm(formula = formula, data = data)
Residuals:
Min 1Q Median 3Q Max
-3475964 -293923 -23069 241043 12260381
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 481728.40 121441.01 3.967 7.65e-05 ***
AREA_SQM 12708.32 369.59 34.385 < 2e-16 ***
AGE -24440.82 2763.16 -8.845 < 2e-16 ***
PROX_CBD -78669.78 6768.97 -11.622 < 2e-16 ***
PROX_CHILDCARE -351617.91 109467.25 -3.212 0.00135 **
PROX_ELDERLYCARE 171029.42 42110.51 4.061 5.14e-05 ***
PROX_URA_GROWTH_AREA 38474.53 12523.57 3.072 0.00217 **
PROX_HAWKER_MARKET 23746.10 29299.76 0.810 0.41782
PROX_KINDERGARTEN 147468.99 82668.87 1.784 0.07466 .
PROX_MRT -314599.68 57947.44 -5.429 6.66e-08 ***
PROX_PARK 563280.50 66551.68 8.464 < 2e-16 ***
PROX_PRIMARY_SCH 180186.08 65237.95 2.762 0.00582 **
PROX_TOP_PRIMARY_SCH 2280.04 20410.43 0.112 0.91107
PROX_SHOPPING_MALL -206604.06 42840.60 -4.823 1.57e-06 ***
PROX_SUPERMARKET -44991.80 77082.64 -0.584 0.55953
PROX_BUS_STOP 683121.35 138353.28 4.938 8.85e-07 ***
NO_Of_UNITS -231.18 89.03 -2.597 0.00951 **
FAMILY_FRIENDLY 140340.77 47020.55 2.985 0.00289 **
FREEHOLD 359913.01 49220.22 7.312 4.38e-13 ***
---Significance stars
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 755800 on 1417 degrees of freedom
Multiple R-squared: 0.6518
Adjusted R-squared: 0.6474
F-statistic: 147.4 on 18 and 1417 DF, p-value: < 2.2e-16
***Extra Diagnostic information
Residual sum of squares: 8.094732e+14
Sigma(hat): 751322.9
AIC: 42970.18
AICc: 42970.77
BIC: 41784.96
***********************************************************************
* Results of Geographically Weighted Regression *
***********************************************************************
*********************Model calibration information*********************
Kernel function: gaussian
Fixed bandwidth: 1529.87
Regression points: the same locations as observations are used.
Distance metric: Euclidean distance metric is used.
****************Summary of GWR coefficient estimates:******************
Min. 1st Qu. Median 3rd Qu.
Intercept -1.7294e+06 5.0686e+05 1.2459e+06 2.0286e+06
AREA_SQM 2.7257e+03 5.4383e+03 7.8679e+03 1.2293e+04
AGE -8.0810e+04 -2.5075e+04 -1.2338e+04 -5.6014e+03
PROX_CBD -1.4408e+06 -2.7205e+05 -1.7043e+05 -6.6519e+04
PROX_CHILDCARE -3.2769e+06 -2.2120e+05 -5.9600e+04 1.0788e+05
PROX_ELDERLYCARE -1.6966e+06 -3.5677e+04 8.9834e+04 2.0528e+05
PROX_URA_GROWTH_AREA -7.0723e+05 9.2127e+03 7.3288e+04 1.8958e+05
PROX_HAWKER_MARKET -6.4011e+05 -5.6009e+04 8.2983e+04 4.3080e+05
PROX_KINDERGARTEN -1.8548e+06 -3.5646e+05 -1.6452e+05 1.3895e+05
PROX_MRT -2.7453e+06 -6.3607e+05 -2.4833e+05 -7.1555e+04
PROX_PARK -1.0939e+06 -1.5591e+05 1.0240e+04 3.2499e+05
PROX_PRIMARY_SCH -6.2015e+05 -1.8895e+05 1.4868e+03 3.8470e+05
PROX_TOP_PRIMARY_SCH -7.6447e+05 -1.1127e+05 -1.7162e+04 5.2347e+04
PROX_SHOPPING_MALL -9.5378e+05 -1.6176e+05 -2.1287e+04 7.0973e+04
PROX_SUPERMARKET -7.2192e+05 -1.1000e+05 -5.9072e+03 1.5699e+05
PROX_BUS_STOP -6.2152e+05 4.5824e+04 4.3923e+05 1.5814e+06
NO_Of_UNITS -1.6083e+03 -2.9384e+02 -1.0619e+02 5.5081e+00
FAMILY_FRIENDLY -1.4636e+06 -4.4834e+04 1.4704e+04 1.7087e+05
FREEHOLD -1.4458e+05 8.0776e+04 1.8066e+05 3.4671e+05
Max.
Intercept 9517178.8
AREA_SQM 19022.4
AGE 34460.5
PROX_CBD 535635.5
PROX_CHILDCARE 1209089.3
PROX_ELDERLYCARE 2236812.0
PROX_URA_GROWTH_AREA 1511628.7
PROX_HAWKER_MARKET 2415026.9
PROX_KINDERGARTEN 782179.4
PROX_MRT 734567.9
PROX_PARK 1074304.2
PROX_PRIMARY_SCH 1472504.3
PROX_TOP_PRIMARY_SCH 883983.8
PROX_SHOPPING_MALL 595084.1
PROX_SUPERMARKET 1647456.7
PROX_BUS_STOP 5131416.8
NO_Of_UNITS 1483.4
FAMILY_FRIENDLY 1278255.4
FREEHOLD 885791.0
************************Diagnostic information*************************
Number of data points: 1436
Effective number of parameters (2trace(S) - trace(S'S)): 310.8448
Effective degrees of freedom (n-2trace(S) + trace(S'S)): 1125.155
AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 42237.55
AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 41869.48
BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42030.5
Residual sum of squares: 3.238745e+14
R-square value: 0.860678
Adjusted R-square value: 0.8221535
***********************************************************************
Program stops at: 2022-12-10 22:58:04
The AICc of the geospatial weighted regression is 42237.55, less than the AICc = 42970.77 of the global regression model.
Building Adaptive Bandwidth GWR Model
Computing the adaptive bandwidth
The step here is similar to the steps in fixed bandwidth GWR but adaptive = TRUE.
bw.adaptive <- bw.gwr(formula = SELLING_PRICE ~ AREA_SQM + AGE +
PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN +
PROX_MRT + PROX_PARK + PROX_PRIMARY_SCH +
PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
data=condo_resale.sp,
approach="CV",
kernel="gaussian",
adaptive=TRUE,
longlat=FALSE)Adaptive bandwidth: 895 CV score: 7.892714e+14
Adaptive bandwidth: 561 CV score: 7.538184e+14
Adaptive bandwidth: 354 CV score: 6.768593e+14
Adaptive bandwidth: 226 CV score: 5.979758e+14
Adaptive bandwidth: 147 CV score: 5.499621e+14
Adaptive bandwidth: 98 CV score: 5.282287e+14
Adaptive bandwidth: 68 CV score: 5.033227e+14
Adaptive bandwidth: 49 CV score: 4.744074e+14
Adaptive bandwidth: 37 CV score: 4.608087e+14
Adaptive bandwidth: 30 CV score: 4.452109e+14
Adaptive bandwidth: 25 CV score: 4.584895e+14
Adaptive bandwidth: 32 CV score: 4.506915e+14
Adaptive bandwidth: 27 CV score: 4.594263e+14
Adaptive bandwidth: 30 CV score: 4.452109e+14
The result shows that the 30 is the recommended data points to be used.
Constructing the adaptive bandwidth gwr model
gwr.adaptive <- gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE +
PROX_CBD + PROX_CHILDCARE + PROX_ELDERLYCARE +
PROX_URA_GROWTH_AREA + PROX_HAWKER_MARKET + PROX_KINDERGARTEN +
PROX_MRT + PROX_PARK + PROX_PRIMARY_SCH +
PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL + PROX_SUPERMARKET +
PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY + FREEHOLD,
data=condo_resale.sp,
bw=bw.adaptive,
kernel = 'gaussian',
adaptive = TRUE,
longlat = FALSE)gwr.adaptive ***********************************************************************
* Package GWmodel *
***********************************************************************
Program starts at: 2022-12-10 22:58:13
Call:
gwr.basic(formula = SELLING_PRICE ~ AREA_SQM + AGE + PROX_CBD +
PROX_CHILDCARE + PROX_ELDERLYCARE + PROX_URA_GROWTH_AREA +
PROX_HAWKER_MARKET + PROX_KINDERGARTEN + PROX_MRT + PROX_PARK +
PROX_PRIMARY_SCH + PROX_TOP_PRIMARY_SCH + PROX_SHOPPING_MALL +
PROX_SUPERMARKET + PROX_BUS_STOP + NO_Of_UNITS + FAMILY_FRIENDLY +
FREEHOLD, data = condo_resale.sp, bw = bw.adaptive, kernel = "gaussian",
adaptive = TRUE, longlat = FALSE)
Dependent (y) variable: SELLING_PRICE
Independent variables: AREA_SQM AGE PROX_CBD PROX_CHILDCARE PROX_ELDERLYCARE PROX_URA_GROWTH_AREA PROX_HAWKER_MARKET PROX_KINDERGARTEN PROX_MRT PROX_PARK PROX_PRIMARY_SCH PROX_TOP_PRIMARY_SCH PROX_SHOPPING_MALL PROX_SUPERMARKET PROX_BUS_STOP NO_Of_UNITS FAMILY_FRIENDLY FREEHOLD
Number of data points: 1436
***********************************************************************
* Results of Global Regression *
***********************************************************************
Call:
lm(formula = formula, data = data)
Residuals:
Min 1Q Median 3Q Max
-3475964 -293923 -23069 241043 12260381
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 481728.40 121441.01 3.967 7.65e-05 ***
AREA_SQM 12708.32 369.59 34.385 < 2e-16 ***
AGE -24440.82 2763.16 -8.845 < 2e-16 ***
PROX_CBD -78669.78 6768.97 -11.622 < 2e-16 ***
PROX_CHILDCARE -351617.91 109467.25 -3.212 0.00135 **
PROX_ELDERLYCARE 171029.42 42110.51 4.061 5.14e-05 ***
PROX_URA_GROWTH_AREA 38474.53 12523.57 3.072 0.00217 **
PROX_HAWKER_MARKET 23746.10 29299.76 0.810 0.41782
PROX_KINDERGARTEN 147468.99 82668.87 1.784 0.07466 .
PROX_MRT -314599.68 57947.44 -5.429 6.66e-08 ***
PROX_PARK 563280.50 66551.68 8.464 < 2e-16 ***
PROX_PRIMARY_SCH 180186.08 65237.95 2.762 0.00582 **
PROX_TOP_PRIMARY_SCH 2280.04 20410.43 0.112 0.91107
PROX_SHOPPING_MALL -206604.06 42840.60 -4.823 1.57e-06 ***
PROX_SUPERMARKET -44991.80 77082.64 -0.584 0.55953
PROX_BUS_STOP 683121.35 138353.28 4.938 8.85e-07 ***
NO_Of_UNITS -231.18 89.03 -2.597 0.00951 **
FAMILY_FRIENDLY 140340.77 47020.55 2.985 0.00289 **
FREEHOLD 359913.01 49220.22 7.312 4.38e-13 ***
---Significance stars
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 755800 on 1417 degrees of freedom
Multiple R-squared: 0.6518
Adjusted R-squared: 0.6474
F-statistic: 147.4 on 18 and 1417 DF, p-value: < 2.2e-16
***Extra Diagnostic information
Residual sum of squares: 8.094732e+14
Sigma(hat): 751322.9
AIC: 42970.18
AICc: 42970.77
BIC: 41784.96
***********************************************************************
* Results of Geographically Weighted Regression *
***********************************************************************
*********************Model calibration information*********************
Kernel function: gaussian
Adaptive bandwidth: 30 (number of nearest neighbours)
Regression points: the same locations as observations are used.
Distance metric: Euclidean distance metric is used.
****************Summary of GWR coefficient estimates:******************
Min. 1st Qu. Median 3rd Qu.
Intercept -1.6028e+08 -5.4344e+05 9.6211e+05 1.8060e+06
AREA_SQM 3.1331e+03 5.6904e+03 7.7690e+03 1.2440e+04
AGE -9.9093e+04 -3.0879e+04 -1.3909e+04 -6.4137e+03
PROX_CBD -2.0014e+07 -3.0226e+05 -1.1138e+05 -3.9884e+04
PROX_CHILDCARE -1.1390e+06 -1.7672e+05 -7.6155e+03 4.0347e+05
PROX_ELDERLYCARE -3.2078e+06 -1.4195e+05 8.7037e+04 2.7724e+05
PROX_URA_GROWTH_AREA -2.0846e+07 -1.4344e+04 8.3095e+04 3.9801e+05
PROX_HAWKER_MARKET -2.0295e+06 -1.1077e+05 9.3663e+04 5.5331e+05
PROX_KINDERGARTEN -1.8346e+06 -3.1986e+05 -2.3020e+04 2.1987e+05
PROX_MRT -2.6114e+07 -9.2429e+05 -2.6791e+05 -4.2711e+04
PROX_PARK -4.1401e+06 -2.0343e+05 6.3772e+04 4.4498e+05
PROX_PRIMARY_SCH -1.3511e+06 -2.2819e+05 -2.6660e+04 4.8742e+05
PROX_TOP_PRIMARY_SCH -5.6507e+06 -1.9060e+05 -1.5224e+04 7.1469e+04
PROX_SHOPPING_MALL -1.3105e+06 -1.4062e+05 -1.6413e+04 1.4532e+05
PROX_SUPERMARKET -3.0962e+06 -3.1361e+05 -5.2868e+04 1.0679e+05
PROX_BUS_STOP -2.1838e+06 -3.8619e+04 4.3163e+05 1.3596e+06
NO_Of_UNITS -2.6839e+03 -2.7312e+02 -7.2995e+01 4.4365e+01
FAMILY_FRIENDLY -6.0104e+05 -6.9317e+04 1.3347e+04 2.4512e+05
FREEHOLD -2.8864e+05 4.8910e+04 1.7121e+05 3.6600e+05
Max.
Intercept 2.7054e+07
AREA_SQM 2.3811e+04
AGE 1.0352e+04
PROX_CBD 2.3502e+07
PROX_CHILDCARE 3.2146e+06
PROX_ELDERLYCARE 2.3324e+06
PROX_URA_GROWTH_AREA 2.0730e+07
PROX_HAWKER_MARKET 5.1540e+06
PROX_KINDERGARTEN 2.4536e+06
PROX_MRT 1.2674e+06
PROX_PARK 3.3645e+06
PROX_PRIMARY_SCH 2.9412e+06
PROX_TOP_PRIMARY_SCH 1.4177e+07
PROX_SHOPPING_MALL 1.5711e+07
PROX_SUPERMARKET 1.5599e+06
PROX_BUS_STOP 1.2288e+07
NO_Of_UNITS 6.6179e+02
FAMILY_FRIENDLY 2.0840e+06
FREEHOLD 1.8127e+06
************************Diagnostic information*************************
Number of data points: 1436
Effective number of parameters (2trace(S) - trace(S'S)): 402.3843
Effective degrees of freedom (n-2trace(S) + trace(S'S)): 1033.616
AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 41993.54
AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 41449.96
BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42115.63
Residual sum of squares: 2.286415e+14
R-square value: 0.9016446
Adjusted R-square value: 0.863318
***********************************************************************
Program stops at: 2022-12-10 22:58:15
The AICc of the adaptive distance geospatial weight regression is 41993.54, less than the adaptive weight global regression’s AICc of 42970.77.
Converting SDF into sf data.frame
The adaptive gwr model has the lowest AICc of them all. Therefore, we will visualize it.
To visualise the fields in SDF, we need to first covert it into sf data.frame.
condo_resale.sf.adaptive <- st_as_sf(gwr.adaptive$SDF) %>%
st_transform(crs=3414)
condo_resale.sf.adaptive.svy21 <- st_transform(condo_resale.sf.adaptive, 3414)
gwr.adaptive.output <- as.data.frame(gwr.adaptive$SDF)
condo_resale.sf.adaptive <- cbind(condo_resale.res.sf, as.matrix(gwr.adaptive.output))glimpse(condo_resale.sf.adaptive)Rows: 1,436
Columns: 89
$ POSTCODE <dbl> 118635, 288420, 267833, 258380, 467169, 466472…
$ SELLING_PRICE <dbl> 3000000, 3880000, 3325000, 4250000, 1400000, 1…
$ AREA_SQM <dbl> 309, 290, 248, 127, 145, 139, 218, 141, 165, 1…
$ AGE <dbl> 30, 32, 33, 7, 28, 22, 24, 24, 27, 31, 17, 22,…
$ PROX_CBD <dbl> 7.941259, 6.609797, 6.898000, 4.038861, 11.783…
$ PROX_CHILDCARE <dbl> 0.16597932, 0.28027246, 0.42922669, 0.39473543…
$ PROX_ELDERLYCARE <dbl> 2.5198118, 1.9333338, 0.5021395, 1.9910316, 1.…
$ PROX_URA_GROWTH_AREA <dbl> 6.618741, 7.505109, 6.463887, 4.906512, 6.4106…
$ PROX_HAWKER_MARKET <dbl> 1.76542207, 0.54507614, 0.37789301, 1.68259969…
$ PROX_KINDERGARTEN <dbl> 0.05835552, 0.61592412, 0.14120309, 0.38200076…
$ PROX_MRT <dbl> 0.5607188, 0.6584461, 0.3053433, 0.6910183, 0.…
$ PROX_PARK <dbl> 1.1710446, 0.1992269, 0.2779886, 0.9832843, 0.…
$ PROX_PRIMARY_SCH <dbl> 1.6340256, 0.9747834, 1.4715016, 1.4546324, 0.…
$ PROX_TOP_PRIMARY_SCH <dbl> 3.3273195, 0.9747834, 1.4715016, 2.3006394, 0.…
$ PROX_SHOPPING_MALL <dbl> 2.2102717, 2.9374279, 1.2256850, 0.3525671, 1.…
$ PROX_SUPERMARKET <dbl> 0.9103958, 0.5900617, 0.4135583, 0.4162219, 0.…
$ PROX_BUS_STOP <dbl> 0.10336166, 0.28673408, 0.28504777, 0.29872340…
$ NO_Of_UNITS <dbl> 18, 20, 27, 30, 30, 31, 32, 32, 32, 32, 34, 34…
$ FAMILY_FRIENDLY <dbl> 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0…
$ FREEHOLD <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1…
$ LEASEHOLD_99YR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ LOG_SELLING_PRICE <dbl> 14.91412, 15.17135, 15.01698, 15.26243, 14.151…
$ MLR_RES <dbl> -1442408.78, 389648.30, 221790.82, 1087048.92,…
$ Intercept <dbl> 2768698.98, 4398265.21, 2605216.84, 84393.17, …
$ AREA_SQM.1 <dbl> 9272.004, 16214.948, 12747.798, 20868.138, 671…
$ AGE.1 <dbl> -11593.551, -47856.312, -24847.155, -97346.332…
$ PROX_CBD.1 <dbl> -160081.00, -288927.21, -242886.78, -281850.14…
$ PROX_CHILDCARE.1 <dbl> 75553.025, 671015.741, -590017.922, 184972.622…
$ PROX_ELDERLYCARE.1 <dbl> -121956.742, 603472.239, 847742.589, -63177.10…
$ PROX_URA_GROWTH_AREA.1 <dbl> -184786.98, -276295.72, -25208.76, -56506.88, …
$ PROX_HAWKER_MARKET.1 <dbl> 268889.46, 596904.66, 569650.53, 1261821.05, 8…
$ PROX_KINDERGARTEN.1 <dbl> 123461.86, -470509.83, -70054.35, -1115075.03,…
$ PROX_MRT.1 <dbl> -362362.010, -2101706.434, -1160810.281, -2423…
$ PROX_PARK.1 <dbl> -328874.63, 80854.05, 317952.19, 47522.12, 108…
$ PROX_PRIMARY_SCH.1 <dbl> 535160.687, 1330164.374, 977828.079, 1886073.8…
$ PROX_TOP_PRIMARY_SCH.1 <dbl> -204772.23, -944577.89, -563446.78, -451298.30…
$ PROX_SHOPPING_MALL.1 <dbl> 96331.950, -552287.901, -739489.566, 172153.49…
$ PROX_SUPERMARKET.1 <dbl> -316646.820, -95812.393, 474130.208, -627179.9…
$ PROX_BUS_STOP.1 <dbl> 1270824.454, 2092787.882, 1235238.025, 8492969…
$ NO_Of_UNITS.1 <dbl> 192.115888, -109.054272, -15.449728, -75.20922…
$ FAMILY_FRIENDLY.1 <dbl> -47759.96, 300279.69, -54089.77, 1617558.95, 1…
$ FREEHOLD.1 <dbl> 357765.43, 537348.66, 79654.29, 977814.22, 309…
$ y <dbl> 3000000, 3880000, 3325000, 4250000, 1400000, 1…
$ yhat <dbl> 3000817.6, 3440670.4, 3550886.0, 5671634.3, 13…
$ residual <dbl> -817.607, 439329.595, -225885.991, -1421634.32…
$ CV_Score <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Stud_residual <dbl> -0.00398958, 1.11709144, -0.75098689, -3.51956…
$ Intercept_SE <dbl> 450686.8, 598911.2, 968524.8, 500366.6, 337300…
$ AREA_SQM_SE <dbl> 822.7387, 818.3671, 990.2089, 608.4580, 1360.8…
$ AGE_SE <dbl> 6123.568, 6091.919, 6284.537, 5912.757, 8069.9…
$ PROX_CBD_SE <dbl> 35701.74, 29767.90, 57745.47, 405765.10, 63742…
$ PROX_CHILDCARE_SE <dbl> 347907.5, 327242.7, 350890.5, 364739.8, 718039…
$ PROX_ELDERLYCARE_SE <dbl> 192340.90, 95841.89, 147228.39, 158423.68, 403…
$ PROX_URA_GROWTH_AREA_SE <dbl> 55569.90, 72876.90, 100941.52, 414548.44, 6872…
$ PROX_HAWKER_MARKET_SE <dbl> 183479.6, 115799.2, 171490.4, 215119.1, 237856…
$ PROX_KINDERGARTEN_SE <dbl> 332018.6, 208196.6, 354059.4, 178900.4, 575983…
$ PROX_MRT_SE <dbl> 200360.6, 264614.9, 282378.2, 291083.2, 400947…
$ PROX_PARK_SE <dbl> 218006.3, 245460.0, 368800.2, 249147.3, 454475…
$ PROX_PRIMARY_SCH_SE <dbl> 150239.8, 171107.7, 224293.3, 268702.4, 363219…
$ PROX_TOP_PRIMARY_SCH_SE <dbl> 90612.50, 116036.81, 169306.50, 123466.16, 282…
$ PROX_SHOPPING_MALL_SE <dbl> 220250.2, 105003.4, 164026.2, 205600.1, 297965…
$ PROX_SUPERMARKET_SE <dbl> 366145.2, 231139.1, 213020.8, 300174.0, 304082…
$ PROX_BUS_STOP_SE <dbl> 642590.0, 410362.6, 468910.8, 610206.4, 768059…
$ NO_Of_UNITS_SE <dbl> 238.9019, 207.9118, 214.4963, 353.7506, 329.74…
$ FAMILY_FRIENDLY_SE <dbl> 133460.38, 110038.49, 154399.68, 107806.70, 15…
$ FREEHOLD_SE <dbl> 111520.88, 131923.99, 145975.38, 134028.31, 21…
$ Intercept_TV <dbl> 6.14328795, 7.34376865, 2.68988139, 0.16866270…
$ AREA_SQM_TV <dbl> 11.269683, 19.813782, 12.873846, 34.296760, 4.…
$ AGE_TV <dbl> -1.8932674, -7.8557044, -3.9536972, -16.463780…
$ PROX_CBD_TV <dbl> -4.48384369, -9.70599963, -4.20616167, -0.6946…
$ PROX_CHILDCARE_TV <dbl> 0.21716410, 2.05051393, -1.68148738, 0.5071358…
$ PROX_ELDERLYCARE_TV <dbl> -0.634065559, 6.296539138, 5.758010155, -0.398…
$ PROX_URA_GROWTH_AREA_TV <dbl> -3.32530713, -3.79126627, -0.24973626, -0.1363…
$ PROX_HAWKER_MARKET_TV <dbl> 1.46550096, 5.15465184, 3.32176393, 5.86568583…
$ PROX_KINDERGARTEN_TV <dbl> 0.37185225, -2.25993044, -0.19786046, -6.23293…
$ PROX_MRT_TV <dbl> -1.80854965, -7.94250970, -4.11083538, -8.3256…
$ PROX_PARK_TV <dbl> -1.5085553, 0.3293981, 0.8621258, 0.1907391, 0…
$ PROX_PRIMARY_SCH_TV <dbl> 3.562042617, 7.773842102, 4.359594852, 7.01919…
$ PROX_TOP_PRIMARY_SCH_TV <dbl> -2.25986739, -8.14032946, -3.32796898, -3.6552…
$ PROX_SHOPPING_MALL_TV <dbl> 0.43737500, -5.25971310, -4.50836228, 0.837321…
$ PROX_SUPERMARKET_TV <dbl> -0.86481222, -0.41452261, 2.22574657, -2.08938…
$ PROX_BUS_STOP_TV <dbl> 1.977659938, 5.099851002, 2.634270689, 13.9181…
$ NO_Of_UNITS_TV <dbl> 0.80416237, -0.52452177, -0.07202795, -0.21260…
$ FAMILY_FRIENDLY_TV <dbl> -0.3578587, 2.7288605, -0.3503231, 15.0042528,…
$ FREEHOLD_TV <dbl> 3.2080579, 4.0731688, 0.5456693, 7.2955797, 1.…
$ Local_R2 <dbl> 0.9055980, 0.8912740, 0.9029717, 0.9167747, 0.…
$ coords.x1 <dbl> 22085.12, 25656.84, 23963.99, 27044.28, 41042.…
$ coords.x2 <dbl> 29951.54, 34546.20, 32890.80, 32319.77, 33743.…
$ geometry <POINT [m]> POINT (22085.12 29951.54), POINT (25656.…
summary(gwr.adaptive$SDF$yhat) Min. 1st Qu. Median Mean 3rd Qu. Max.
398408 1098831 1377586 1753125 1984196 13871610
Visualizing local R2
tmap_mode("view")
tm_shape(mpsz)+
tm_polygons(alpha = 0.1) +
tm_shape(condo_resale.sf.adaptive) +
tm_dots(col = "Local_R2",
border.col = "gray60",
border.lwd = 1) +
tm_view(set.zoom.limits = c(11,14))tmap_mode("plot")Visualising coefficient estimates
By using sync = TRUE in ggarrange(), we can display two maps side by side with synchronous interactions.
tmap_mode("view")
AREA_SQM_SE <- tm_shape(mpsz)+
tm_polygons(alpha = 0.1) +
tm_shape(condo_resale.sf.adaptive) +
tm_dots(col = "AREA_SQM_SE",
border.col = "gray60",
border.lwd = 1) +
tm_view(set.zoom.limits = c(11,14))
AREA_SQM_TV <- tm_shape(mpsz)+
tm_polygons(alpha = 0.1) +
tm_shape(condo_resale.sf.adaptive) +
tm_dots(col = "AREA_SQM_TV",
border.col = "gray60",
border.lwd = 1) +
tm_view(set.zoom.limits = c(11,14))
tmap_arrange(AREA_SQM_SE, AREA_SQM_TV,
asp=1, ncol=2,
sync = TRUE)tmap_mode("plot")By URA Plannign Region
tm_shape(mpsz[mpsz$REGION_N=="CENTRAL REGION", ])+
tm_polygons()+
tm_shape(condo_resale.sf.adaptive) +
tm_bubbles(col = "Local_R2",
size = 0.15,
border.col = "gray60",
border.lwd = 1)
tmap_mode("plot")